/*** 
This do-file produces the Median Household Income county-level data we use from
the CSV downloaded from Census.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Create required folders
cap mkdir "${root}/data/derived/ACS 2014-2018 5-Year County"
cap mkdir "${root}/data/derived/ACS 2014-2018 5-Year County/Individual Variables"

*-------------------------------------------------------------------------------
* Load and clean raw data
*-------------------------------------------------------------------------------

cd "${root}/data/derived/ACS 2014-2018 5-Year County/Individual Variables"

project, uses("${root}/data/dvc/ACS 2014-2018 5-Year County/Median Household Income/ACSST5Y2018.S1903_2020-08-21T132427.zip") raw
unzipfile "${root}/data/dvc/ACS 2014-2018 5-Year County/Median Household Income/ACSST5Y2018.S1903_2020-08-21T132427.zip", replace

project, uses("${root}/data/derived/ACS 2014-2018 5-Year County/Individual Variables/ACSST5Y2018.S1903_data_with_overlays_2020-08-21T132331.csv")
import delimited "${root}/data/derived/ACS 2014-2018 5-Year County/Individual Variables/ACSST5Y2018.S1903_data_with_overlays_2020-08-21T132331.csv", varnames(1) clear

* Remove sections of the geo_id variable other than the county FIPS.
replace geo_id = subinstr(geo_id, "0500000US", "", .)

* Renaming geographic variables for clarity
rename geo_id county_fips
rename name county_name
assert strlen(county_fips) == 5 if _n > 1

* Drop row containing descriptions of variables
drop in 1

* Keeping only the county variable and the median household income variable
keep county_fips county_name s1903_c03_001e
rename s1903_c03_001e medhhinc_2014_2018_est

* Replace non-numeric missing values with an empty string
replace medhhinc_2014_2018_est = "" if medhhinc_2014_2018_est == "null"

* Convert all variables to numeric
destring county_fips, replace
destring medhhinc_2014_2018_est, replace

*-------------------------------------------------------------------------------
* Save file
*-------------------------------------------------------------------------------

save "${root}/data/derived/ACS 2014-2018 5-Year County/Individual Variables/ACS 2014-2018 Median Household Income.dta", replace
project, creates("${root}/data/derived/ACS 2014-2018 5-Year County/Individual Variables/ACS 2014-2018 Median Household Income.dta")
